data <- read_csv("/Users/jorgevargasmutizabal/Desktop/Frog game statistics/R Analysis Frog/filtered_data.csv")
## New names:
## Rows: 21488 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (9): ...1, Player_ID, LexTale, Game_Version, Game_Level, Phrase_Conditio...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
str(data)
## spc_tbl_ [21,488 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ...1 : num [1:21488] 1 2 3 4 5 6 7 8 9 10 ...
## $ Player_ID : num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
## $ LexTale : num [1:21488] 8 8 8 8 8 8 8 8 8 8 ...
## $ Game_Version : num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
## $ Game_Level : num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
## $ Phrase_Condition: num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
## $ Question_Num : num [1:21488] 0 1 2 3 4 5 6 7 8 9 ...
## $ Answer : num [1:21488] 1 1 1 1 1 1 1 1 0 1 ...
## $ Reaction_Time : num [1:21488] 1.99 0.8 1.8 0.77 0.82 0.86 0.82 0.85 0.78 1.06 ...
## - attr(*, "spec")=
## .. cols(
## .. ...1 = col_double(),
## .. Player_ID = col_double(),
## .. LexTale = col_double(),
## .. Game_Version = col_double(),
## .. Game_Level = col_double(),
## .. Phrase_Condition = col_double(),
## .. Question_Num = col_double(),
## .. Answer = col_double(),
## .. Reaction_Time = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
data <- data %>%
mutate(
Player_ID = as.factor(Player_ID), # Convert to factor
LexTale = as.numeric(LexTale), # Convert to numeric
Game_Version = as.factor(Game_Version), # Convert to factor
Game_Level = as.factor(Game_Level), # Convert to factor
Phrase_Condition = as.factor(Phrase_Condition), # Convert to factor
Question_Num = as.numeric(Question_Num), # Convert to numeric
Answer = as.numeric(Answer), # Convert to factor
Reaction_Time = as.numeric(Reaction_Time) # Convert to numeric
)
# Verify changes
str(data)
## tibble [21,488 × 9] (S3: tbl_df/tbl/data.frame)
## $ ...1 : num [1:21488] 1 2 3 4 5 6 7 8 9 10 ...
## $ Player_ID : Factor w/ 38 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LexTale : num [1:21488] 8 8 8 8 8 8 8 8 8 8 ...
## $ Game_Version : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ Game_Level : Factor w/ 8 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Phrase_Condition: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ Question_Num : num [1:21488] 0 1 2 3 4 5 6 7 8 9 ...
## $ Answer : num [1:21488] 1 1 1 1 1 1 1 1 0 1 ...
## $ Reaction_Time : num [1:21488] 1.99 0.8 1.8 0.77 0.82 0.86 0.82 0.85 0.78 1.06 ...
model1 <- lmer(Reaction_Time ~ 1 + (1 | Player_ID), data = data)
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + (1 | Player_ID)
## Data: data
##
## REML criterion at convergence: 22273.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4173 -0.7322 -0.1049 0.6110 3.5746
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.01319 0.1149
## Residual 0.16394 0.4049
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.83745 0.01885 37.08626 44.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Diagnostic plots
plot_residuals_diagnostics(model1)
## `geom_smooth()` using formula = 'y ~ x'
# Model diagnostics
check_model(model1)
model2 <- lmer(Reaction_Time ~ 1 + Question_Num + (1 + Question_Num | Player_ID), data = data)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Model failed to converge with 1 negative eigenvalue: -2.5e+01
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Question_Num + (1 + Question_Num | Player_ID)
## Data: data
##
## REML criterion at convergence: 21846.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6398 -0.7207 -0.0994 0.6078 3.7181
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Player_ID (Intercept) 3.649e-01 0.604035
## Question_Num 2.582e-06 0.001607 -0.96
## Residual 1.591e-01 0.398879
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.713e-01 9.814e-02 2.145e+04 8.878 <2e-16 ***
## Question_Num -2.220e-04 2.620e-04 2.009e+01 -0.847 0.407
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Question_Nm -0.962
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
#Diagnostic plots
plot_residuals_diagnostics(model2)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model2, n = 500)
plot(residuals_simulation)
# Model diagnostics
check_model(model2)
# Check for influential cases potentially affecting the model
#influence_measures <- influence(model3)
#plot(influence_measures, which = "cook")
# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))
# Model interpretation - examining fixed and random effects
#fixed_effects <- fixef(model3)
#random_effects <- ranef(model3)
# Printing fixed effects for interpretation
#print(fixed_effects)
# Printing random effects for interpretation
#print(random_effects)
# Get estimated marginal means for specific values of Question_Num
emm_results <- emmeans(model2, specs = ~ Question_Num, at = list(Question_Num = seq(min(data$Question_Num), max(data$Question_Num), by = 10)))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
# Summarize the results
summary(emm_results)
## Question_Num emmean SE df asymp.LCL asymp.UCL
## 0 0.871 0.0981 Inf 0.679 1.064
## 10 0.869 0.0956 Inf 0.682 1.057
## 20 0.867 0.0931 Inf 0.684 1.049
## 30 0.865 0.0906 Inf 0.687 1.042
## 40 0.862 0.0881 Inf 0.690 1.035
## 50 0.860 0.0856 Inf 0.692 1.028
## 60 0.858 0.0831 Inf 0.695 1.021
## 70 0.856 0.0806 Inf 0.698 1.014
## 80 0.854 0.0782 Inf 0.700 1.007
## 90 0.851 0.0757 Inf 0.703 1.000
## 100 0.849 0.0733 Inf 0.706 0.993
## 110 0.847 0.0708 Inf 0.708 0.986
## 120 0.845 0.0684 Inf 0.711 0.979
## 130 0.842 0.0660 Inf 0.713 0.972
## 140 0.840 0.0636 Inf 0.716 0.965
## 150 0.838 0.0613 Inf 0.718 0.958
## 160 0.836 0.0589 Inf 0.720 0.951
## 170 0.834 0.0566 Inf 0.723 0.944
## 180 0.831 0.0543 Inf 0.725 0.938
## 190 0.829 0.0520 Inf 0.727 0.931
## 200 0.827 0.0498 Inf 0.729 0.925
## 210 0.825 0.0476 Inf 0.731 0.918
## 220 0.822 0.0454 Inf 0.733 0.912
## 230 0.820 0.0434 Inf 0.735 0.905
## 240 0.818 0.0413 Inf 0.737 0.899
## 250 0.816 0.0394 Inf 0.739 0.893
## 260 0.814 0.0375 Inf 0.740 0.887
## 270 0.811 0.0357 Inf 0.741 0.881
## 280 0.809 0.0340 Inf 0.743 0.876
## 290 0.807 0.0324 Inf 0.743 0.871
## 300 0.805 0.0310 Inf 0.744 0.866
## 310 0.803 0.0298 Inf 0.744 0.861
## 320 0.800 0.0287 Inf 0.744 0.857
## 330 0.798 0.0278 Inf 0.744 0.853
## 340 0.796 0.0272 Inf 0.743 0.849
## 350 0.794 0.0268 Inf 0.741 0.846
## 360 0.791 0.0267 Inf 0.739 0.844
## 370 0.789 0.0268 Inf 0.737 0.842
## 380 0.787 0.0271 Inf 0.734 0.840
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
# Plot the estimated marginal means
plot(emm_results)
# Create a plot of Reaction_Time vs. Question_Num with a regression line
ggplot(data, aes(x = Question_Num, y = Reaction_Time)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Reaction Time vs. Question Number",
x = "Question Number",
y = "Reaction Time")
## `geom_smooth()` using formula = 'y ~ x'
model3 <- lmer(Reaction_Time ~ 1 + Phrase_Condition + (1 + Phrase_Condition | Player_ID), data = data)
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model3))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Phrase_Condition + (1 + Phrase_Condition |
## Player_ID)
## Data: data
##
## REML criterion at convergence: 17531.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2202 -0.6870 -0.1202 0.5792 4.1834
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Player_ID (Intercept) 0.01134 0.1065
## Phrase_Condition2 0.02389 0.1546 -0.29
## Phrase_Condition3 0.01391 0.1179 -0.30 0.57
## Phrase_Condition4 0.03883 0.1971 -0.35 0.64 0.32
## Residual 0.12971 0.3601
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.62506 0.01822 37.15675 34.306 < 2e-16 ***
## Phrase_Condition2 0.23231 0.02640 37.20209 8.799 1.27e-10 ***
## Phrase_Condition3 0.03446 0.02078 37.43797 1.659 0.105
## Phrase_Condition4 0.39736 0.03274 37.12133 12.137 1.73e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Phr_C2 Phr_C3
## Phrs_Cndtn2 -0.332
## Phrs_Cndtn3 -0.346 0.561
## Phrs_Cndtn4 -0.378 0.636 0.336
#Diagnostic plots
plot_residuals_diagnostics(model3)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model3, n = 500)
plot(residuals_simulation)
# Model diagnostics
check_model(model3)
# Check for influential cases potentially affecting the model
#influence_measures <- influence(model3)
#plot(influence_measures, which = "cook")
# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))
# Model interpretation - examining fixed and random effects
#fixed_effects <- fixef(model3)
#random_effects <- ranef(model3)
# Printing fixed effects for interpretation
#print(fixed_effects)
# Printing random effects for interpretation
#print(random_effects)
# Conducting pairwise comparisons of phrase conditions using estimated marginal means
emm_results <- emmeans(model3, specs = ~ Phrase_Condition)
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(emm_results)
## Phrase_Condition emmean SE df asymp.LCL asymp.UCL
## 1 0.625 0.0182 Inf 0.589 0.661
## 2 0.857 0.0266 Inf 0.805 0.910
## 3 0.660 0.0224 Inf 0.616 0.703
## 4 1.022 0.0309 Inf 0.962 1.083
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
pairwise_comparisons <- emmeans(model3, specs = pairwise ~ Phrase_Condition)
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(pairwise_comparisons)
## $emmeans
## Phrase_Condition emmean SE df asymp.LCL asymp.UCL
## 1 0.625 0.0182 Inf 0.589 0.661
## 2 0.857 0.0266 Inf 0.805 0.910
## 3 0.660 0.0224 Inf 0.616 0.703
## 4 1.022 0.0309 Inf 0.962 1.083
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## Phrase_Condition1 - Phrase_Condition2 -0.2323 0.0264 Inf -8.799 <.0001
## Phrase_Condition1 - Phrase_Condition3 -0.0345 0.0208 Inf -1.659 0.3457
## Phrase_Condition1 - Phrase_Condition4 -0.3974 0.0327 Inf -12.137 <.0001
## Phrase_Condition2 - Phrase_Condition3 0.1978 0.0227 Inf 8.734 <.0001
## Phrase_Condition2 - Phrase_Condition4 -0.1651 0.0259 Inf -6.383 <.0001
## Phrase_Condition3 - Phrase_Condition4 -0.3629 0.0323 Inf -11.220 <.0001
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 4 estimates
plot(emm_results)
# Create a boxplot
ggplot(data, aes(x = Phrase_Condition, y = Reaction_Time, fill = Phrase_Condition)) +
geom_boxplot() +
labs(title = "Reaction Times by Phrase Condition", x = "Phrase Condition", y = "Reaction Time (seconds)") +
theme_minimal()
# Calculate average reaction times per player per condition
avg_reaction_times <- data %>%
group_by(Player_ID, Phrase_Condition) %>%
summarise(Avg_Reaction_Time = mean(Reaction_Time), .groups = 'drop')
# Interaction plot
ggplot(avg_reaction_times, aes(x = Phrase_Condition, y = Avg_Reaction_Time, group = Player_ID, color = Phrase_Condition)) +
geom_line(alpha = 0.5) + # Slightly transparent to see overlapping lines
geom_point() +
labs(title = "Interaction of Phrase Condition and Player on Reaction Times", x = "Phrase Condition", y = "Average Reaction Time (seconds)") +
theme_minimal()
model8 <- lmer(Reaction_Time ~ 1 + Game_Version + (1 | Player_ID), data = data)
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model8))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Game_Version + (1 | Player_ID)
## Data: data
##
## REML criterion at convergence: 22280.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4201 -0.7329 -0.1042 0.6121 3.5759
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.01211 0.1101
## Residual 0.16394 0.4049
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.80191 0.03720 34.25585 21.554 <2e-16 ***
## Game_Version2 0.09354 0.05260 34.21796 1.778 0.0843 .
## Game_Version3 0.06630 0.05123 34.11126 1.294 0.2043
## Game_Version4 -0.01540 0.05121 34.06945 -0.301 0.7654
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Gm_Vr2 Gm_Vr3
## Game_Versn2 -0.707
## Game_Versn3 -0.726 0.514
## Game_Versn4 -0.726 0.514 0.528
#Diagnostic plots
plot_residuals_diagnostics(model8)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model8, n = 500)
plot(residuals_simulation)
# Model diagnostics
check_model(model8)
# Check for influential cases potentially affecting the model
#influence_measures <- influence(model4)
#plot(influence_measures, which = "cook")
# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))
# Model interpretation - examining fixed and random effects
fixed_effects <- fixef(model8)
random_effects <- ranef(model8)
#This doesnt work:
#model4 <- lmer(Reaction_Time ~ 1 + Phrase_Condition * Game_Version + (1 + Phrase_Condition | Player_ID), data = data)
model4 <- lmer(Reaction_Time ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID), data = data)
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model4))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID)
## Data: data
##
## REML criterion at convergence: 18852.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8743 -0.6916 -0.1071 0.5890 4.2127
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.01441 0.1200
## Residual 0.13924 0.3731
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 6.556e-01 4.202e-02 3.992e+01 15.601
## Phrase_Condition2 1.356e-01 1.812e-02 2.144e+04 7.488
## Phrase_Condition3 -5.123e-02 1.777e-02 2.144e+04 -2.883
## Phrase_Condition4 3.342e-01 1.574e-02 2.145e+04 21.231
## Game_Version2 -1.697e-02 5.927e-02 3.951e+01 -0.286
## Game_Version3 -6.202e-02 5.788e-02 3.980e+01 -1.072
## Game_Version4 -1.793e-02 5.769e-02 3.928e+01 -0.311
## Phrase_Condition2:Game_Version2 1.793e-01 2.505e-02 2.144e+04 7.156
## Phrase_Condition3:Game_Version2 1.668e-01 2.457e-02 2.144e+04 6.791
## Phrase_Condition4:Game_Version2 1.195e-01 2.185e-02 2.145e+04 5.468
## Phrase_Condition2:Game_Version3 1.806e-01 2.481e-02 2.144e+04 7.282
## Phrase_Condition3:Game_Version3 1.939e-01 2.407e-02 2.144e+04 8.055
## Phrase_Condition4:Game_Version3 8.696e-02 2.108e-02 2.145e+04 4.125
## Phrase_Condition2:Game_Version4 -1.333e-02 2.402e-02 2.144e+04 -0.555
## Phrase_Condition3:Game_Version4 4.042e-02 2.350e-02 2.144e+04 1.720
## Phrase_Condition4:Game_Version4 -4.830e-02 2.038e-02 2.145e+04 -2.370
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Phrase_Condition2 7.29e-14 ***
## Phrase_Condition3 0.00395 **
## Phrase_Condition4 < 2e-16 ***
## Game_Version2 0.77611
## Game_Version3 0.29039
## Game_Version4 0.75764
## Phrase_Condition2:Game_Version2 8.55e-13 ***
## Phrase_Condition3:Game_Version2 1.14e-11 ***
## Phrase_Condition4:Game_Version2 4.60e-08 ***
## Phrase_Condition2:Game_Version3 3.40e-13 ***
## Phrase_Condition3:Game_Version3 8.34e-16 ***
## Phrase_Condition4:Game_Version3 3.72e-05 ***
## Phrase_Condition2:Game_Version4 0.57876
## Phrase_Condition3:Game_Version4 0.08539 .
## Phrase_Condition4:Game_Version4 0.01779 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(summary(model4), correlation=TRUE) or
## vcov(summary(model4)) if you need it
#Diagnostic plots
plot_residuals_diagnostics(model4)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model4, n = 500)
plot(residuals_simulation)
# Model diagnostics
check_model(model4)
# Check for influential cases potentially affecting the model
#influence_measures <- influence(model4)
#plot(influence_measures, which = "cook")
# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))
# Model interpretation - examining fixed and random effects
fixed_effects <- fixef(model4)
random_effects <- ranef(model4)
# Printing fixed effects for interpretation
print(fixed_effects)
## (Intercept) Phrase_Condition2
## 0.65558253 0.13564920
## Phrase_Condition3 Phrase_Condition4
## -0.05123020 0.33421265
## Game_Version2 Game_Version3
## -0.01697242 -0.06201693
## Game_Version4 Phrase_Condition2:Game_Version2
## -0.01792551 0.17929197
## Phrase_Condition3:Game_Version2 Phrase_Condition4:Game_Version2
## 0.16685111 0.11946684
## Phrase_Condition2:Game_Version3 Phrase_Condition3:Game_Version3
## 0.18063763 0.19392587
## Phrase_Condition4:Game_Version3 Phrase_Condition2:Game_Version4
## 0.08696084 -0.01333338
## Phrase_Condition3:Game_Version4 Phrase_Condition4:Game_Version4
## 0.04042475 -0.04830418
# Printing random effects for interpretation
print(random_effects)
## $Player_ID
## (Intercept)
## 1 -0.162882039
## 2 -0.145521920
## 3 -0.108868536
## 4 0.104632078
## 5 0.133752086
## 6 -0.024278371
## 7 0.042178400
## 8 0.006710037
## 9 0.154278267
## 10 0.041854998
## 11 -0.004540477
## 12 0.068106551
## 13 -0.041911198
## 14 -0.012500741
## 15 0.163653353
## 16 0.080671832
## 17 -0.285579699
## 18 -0.009754619
## 19 -0.060621845
## 20 0.007955007
## 21 0.055070276
## 22 0.023949859
## 23 -0.155227060
## 24 -0.015719218
## 25 0.073204282
## 26 0.143623779
## 27 -0.077192166
## 28 0.004957085
## 29 0.159977433
## 30 -0.015698051
## 31 0.107812884
## 32 0.076202538
## 33 0.076693016
## 34 -0.156064018
## 35 0.185045375
## 36 -0.074621237
## 37 -0.149749872
## 38 -0.209598068
##
## with conditional variances for "Player_ID"
# Estimate marginal means for all combinations
emm1 <- emmeans(model4, specs = ~ Phrase_Condition * Game_Version)
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(emm1)
## Phrase_Condition Game_Version emmean SE df asymp.LCL asymp.UCL
## 1 1 0.656 0.0420 Inf 0.573 0.738
## 2 1 0.791 0.0420 Inf 0.709 0.874
## 3 1 0.604 0.0419 Inf 0.522 0.686
## 4 1 0.990 0.0410 Inf 0.909 1.070
## 1 2 0.639 0.0418 Inf 0.557 0.721
## 2 2 0.954 0.0419 Inf 0.871 1.036
## 3 2 0.754 0.0418 Inf 0.672 0.836
## 4 2 1.092 0.0411 Inf 1.012 1.173
## 1 3 0.594 0.0398 Inf 0.516 0.672
## 2 3 0.910 0.0398 Inf 0.832 0.988
## 3 3 0.736 0.0395 Inf 0.659 0.814
## 4 3 1.015 0.0387 Inf 0.939 1.091
## 1 4 0.638 0.0395 Inf 0.560 0.715
## 2 4 0.760 0.0396 Inf 0.682 0.838
## 3 4 0.627 0.0395 Inf 0.549 0.704
## 4 4 0.924 0.0386 Inf 0.848 0.999
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
# Pairwise comparisons across phrase conditions within each game version
pairs(emm1, simple = "each")
## $`simple contrasts for Phrase_Condition`
## Game_Version = 1:
## contrast estimate SE df z.ratio p.value
## Phrase_Condition1 - Phrase_Condition2 -0.1356 0.0181 Inf -7.488 <.0001
## Phrase_Condition1 - Phrase_Condition3 0.0512 0.0178 Inf 2.883 0.0206
## Phrase_Condition1 - Phrase_Condition4 -0.3342 0.0157 Inf -21.231 <.0001
## Phrase_Condition2 - Phrase_Condition3 0.1869 0.0178 Inf 10.525 <.0001
## Phrase_Condition2 - Phrase_Condition4 -0.1986 0.0157 Inf -12.624 <.0001
## Phrase_Condition3 - Phrase_Condition4 -0.3854 0.0152 Inf -25.283 <.0001
##
## Game_Version = 2:
## contrast estimate SE df z.ratio p.value
## Phrase_Condition1 - Phrase_Condition2 -0.3149 0.0173 Inf -18.200 <.0001
## Phrase_Condition1 - Phrase_Condition3 -0.1156 0.0170 Inf -6.816 <.0001
## Phrase_Condition1 - Phrase_Condition4 -0.4537 0.0152 Inf -29.943 <.0001
## Phrase_Condition2 - Phrase_Condition3 0.1993 0.0172 Inf 11.582 <.0001
## Phrase_Condition2 - Phrase_Condition4 -0.1387 0.0155 Inf -8.965 <.0001
## Phrase_Condition3 - Phrase_Condition4 -0.3381 0.0151 Inf -22.441 <.0001
##
## Game_Version = 3:
## contrast estimate SE df z.ratio p.value
## Phrase_Condition1 - Phrase_Condition2 -0.3163 0.0169 Inf -18.666 <.0001
## Phrase_Condition1 - Phrase_Condition3 -0.1427 0.0162 Inf -8.787 <.0001
## Phrase_Condition1 - Phrase_Condition4 -0.4212 0.0140 Inf -30.036 <.0001
## Phrase_Condition2 - Phrase_Condition3 0.1736 0.0163 Inf 10.660 <.0001
## Phrase_Condition2 - Phrase_Condition4 -0.1049 0.0141 Inf -7.453 <.0001
## Phrase_Condition3 - Phrase_Condition4 -0.2785 0.0131 Inf -21.194 <.0001
##
## Game_Version = 4:
## contrast estimate SE df z.ratio p.value
## Phrase_Condition1 - Phrase_Condition2 -0.1223 0.0158 Inf -7.759 <.0001
## Phrase_Condition1 - Phrase_Condition3 0.0108 0.0154 Inf 0.703 0.8960
## Phrase_Condition1 - Phrase_Condition4 -0.2859 0.0129 Inf -22.089 <.0001
## Phrase_Condition2 - Phrase_Condition3 0.1331 0.0156 Inf 8.523 <.0001
## Phrase_Condition2 - Phrase_Condition4 -0.1636 0.0133 Inf -12.333 <.0001
## Phrase_Condition3 - Phrase_Condition4 -0.2967 0.0126 Inf -23.589 <.0001
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 4 estimates
##
## $`simple contrasts for Game_Version`
## Phrase_Condition = 1:
## contrast estimate SE df z.ratio p.value
## Game_Version1 - Game_Version2 0.016972 0.0593 Inf 0.286 0.9918
## Game_Version1 - Game_Version3 0.062017 0.0579 Inf 1.072 0.7069
## Game_Version1 - Game_Version4 0.017926 0.0577 Inf 0.311 0.9896
## Game_Version2 - Game_Version3 0.045045 0.0577 Inf 0.780 0.8634
## Game_Version2 - Game_Version4 0.000953 0.0575 Inf 0.017 1.0000
## Game_Version3 - Game_Version4 -0.044091 0.0561 Inf -0.786 0.8608
##
## Phrase_Condition = 2:
## contrast estimate SE df z.ratio p.value
## Game_Version1 - Game_Version2 -0.162320 0.0593 Inf -2.735 0.0317
## Game_Version1 - Game_Version3 -0.118621 0.0579 Inf -2.049 0.1701
## Game_Version1 - Game_Version4 0.031259 0.0577 Inf 0.541 0.9489
## Game_Version2 - Game_Version3 0.043699 0.0578 Inf 0.756 0.8742
## Game_Version2 - Game_Version4 0.193578 0.0577 Inf 3.357 0.0044
## Game_Version3 - Game_Version4 0.149880 0.0562 Inf 2.668 0.0382
##
## Phrase_Condition = 3:
## contrast estimate SE df z.ratio p.value
## Game_Version1 - Game_Version2 -0.149879 0.0591 Inf -2.534 0.0548
## Game_Version1 - Game_Version3 -0.131909 0.0576 Inf -2.291 0.1001
## Game_Version1 - Game_Version4 -0.022499 0.0575 Inf -0.391 0.9797
## Game_Version2 - Game_Version3 0.017970 0.0575 Inf 0.313 0.9894
## Game_Version2 - Game_Version4 0.127379 0.0575 Inf 2.217 0.1187
## Game_Version3 - Game_Version4 0.109410 0.0559 Inf 1.959 0.2038
##
## Phrase_Condition = 4:
## contrast estimate SE df z.ratio p.value
## Game_Version1 - Game_Version2 -0.102494 0.0581 Inf -1.766 0.2900
## Game_Version1 - Game_Version3 -0.024944 0.0564 Inf -0.442 0.9711
## Game_Version1 - Game_Version4 0.066230 0.0563 Inf 1.175 0.6424
## Game_Version2 - Game_Version3 0.077551 0.0564 Inf 1.375 0.5150
## Game_Version2 - Game_Version4 0.168724 0.0564 Inf 2.994 0.0146
## Game_Version3 - Game_Version4 0.091174 0.0546 Inf 1.669 0.3404
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 4 estimates
plot(emm1)
1 (Intercept): Represents the baseline reaction time for the first question and the baseline categories of Phrase_Condition and Game_Version.
Question_Num: Continuous variable representing the sequential number of questions. A negative coefficient indicates improved reaction time (i.e., decreasing reaction time) as the game progresses.
Phrase_Condition * Game_Version: Examines if the effect of phrase conditions on reaction time is modified by the game version, identifying combinations where game version influences the difficulty of phrase conditions.
(1 | Player_ID): Random intercepts for players account for individual differences in baseline reaction times, acknowledging that some players generally respond faster or slower than others, independent of condition or version.
model5 <- lmer(Reaction_Time ~ 1 + Question_Num + Phrase_Condition * Game_Version + (1 | Player_ID), data = data)
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model5))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Question_Num + Phrase_Condition * Game_Version +
## (1 | Player_ID)
## Data: data
##
## REML criterion at convergence: 18849.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8973 -0.6912 -0.1075 0.5843 4.1400
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.01431 0.1196
## Residual 0.13911 0.3730
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 6.381e-01 4.205e-02 4.061e+01 15.174
## Question_Num 1.384e-04 2.986e-05 2.144e+04 4.636
## Phrase_Condition2 1.325e-01 1.812e-02 2.144e+04 7.309
## Phrase_Condition3 -6.598e-02 1.805e-02 2.144e+04 -3.656
## Phrase_Condition4 3.188e-01 1.608e-02 2.145e+04 19.827
## Game_Version2 -3.516e-02 5.921e-02 3.989e+01 -0.594
## Game_Version3 -7.726e-02 5.778e-02 4.010e+01 -1.337
## Game_Version4 -2.110e-02 5.750e-02 3.932e+01 -0.367
## Phrase_Condition2:Game_Version2 2.003e-01 2.545e-02 2.144e+04 7.871
## Phrase_Condition3:Game_Version2 1.971e-01 2.541e-02 2.144e+04 7.757
## Phrase_Condition4:Game_Version2 1.424e-01 2.239e-02 2.145e+04 6.360
## Phrase_Condition2:Game_Version3 1.808e-01 2.479e-02 2.144e+04 7.293
## Phrase_Condition3:Game_Version3 2.244e-01 2.494e-02 2.144e+04 8.995
## Phrase_Condition4:Game_Version3 1.209e-01 2.231e-02 2.145e+04 5.421
## Phrase_Condition2:Game_Version4 -2.168e-02 2.407e-02 2.144e+04 -0.901
## Phrase_Condition3:Game_Version4 4.028e-02 2.349e-02 2.144e+04 1.715
## Phrase_Condition4:Game_Version4 -2.144e-02 2.118e-02 2.145e+04 -1.012
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Question_Num 3.58e-06 ***
## Phrase_Condition2 2.78e-13 ***
## Phrase_Condition3 0.000257 ***
## Phrase_Condition4 < 2e-16 ***
## Game_Version2 0.555975
## Game_Version3 0.188727
## Game_Version4 0.715573
## Phrase_Condition2:Game_Version2 3.68e-15 ***
## Phrase_Condition3:Game_Version2 9.06e-15 ***
## Phrase_Condition4:Game_Version2 2.06e-10 ***
## Phrase_Condition2:Game_Version3 3.13e-13 ***
## Phrase_Condition3:Game_Version3 < 2e-16 ***
## Phrase_Condition4:Game_Version3 6.00e-08 ***
## Phrase_Condition2:Game_Version4 0.367788
## Phrase_Condition3:Game_Version4 0.086359 .
## Phrase_Condition4:Game_Version4 0.311457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 17 > 12.
## Use print(summary(model5), correlation=TRUE) or
## vcov(summary(model5)) if you need it
#Diagnostic plots
plot_residuals_diagnostics(model5)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model5, n = 500)
plot(residuals_simulation)
# Model diagnostics
check_model(model5)
# Check for influential cases potentially affecting the model
#influence_measures <- influence(model5)
#plot(influence_measures, which = "cook")
# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))
# Model interpretation - examining fixed and random effects
fixed_effects <- fixef(model5)
random_effects <- ranef(model5)
# Printing fixed effects for interpretation
print(fixed_effects)
## (Intercept) Question_Num
## 0.6381127707 0.0001384314
## Phrase_Condition2 Phrase_Condition3
## 0.1324504693 -0.0659754779
## Phrase_Condition4 Game_Version2
## 0.3188284767 -0.0351594821
## Game_Version3 Game_Version4
## -0.0772602602 -0.0211035151
## Phrase_Condition2:Game_Version2 Phrase_Condition3:Game_Version2
## 0.2003138616 0.1971203306
## Phrase_Condition4:Game_Version2 Phrase_Condition2:Game_Version3
## 0.1424237445 0.1808295797
## Phrase_Condition3:Game_Version3 Phrase_Condition4:Game_Version3
## 0.2243767225 0.1209361948
## Phrase_Condition2:Game_Version4 Phrase_Condition3:Game_Version4
## -0.0216796365 0.0402806219
## Phrase_Condition4:Game_Version4
## -0.0214371040
# Printing random effects for interpretation
print(random_effects)
## $Player_ID
## (Intercept)
## 1 -0.163105853
## 2 -0.144771169
## 3 -0.109213499
## 4 0.104317327
## 5 0.133742164
## 6 -0.023698190
## 7 0.041663737
## 8 0.006680579
## 9 0.154384903
## 10 0.041320520
## 11 -0.005030095
## 12 0.068213979
## 13 -0.042275508
## 14 -0.012387106
## 15 0.163320561
## 16 0.081750306
## 17 -0.284207392
## 18 -0.010705265
## 19 -0.058482394
## 20 0.008489376
## 21 0.055441845
## 22 0.021989135
## 23 -0.156603283
## 24 -0.014701342
## 25 0.073543785
## 26 0.143249845
## 27 -0.077776167
## 28 0.004849200
## 29 0.158797420
## 30 -0.015243028
## 31 0.108024919
## 32 0.076923790
## 33 0.075534000
## 34 -0.155164868
## 35 0.181760464
## 36 -0.073866835
## 37 -0.146811014
## 38 -0.209954848
##
## with conditional variances for "Player_ID"
# Estimate marginal means at various levels of Question_Num
emm2 <- emmeans(model5, specs = ~ Phrase_Condition * Game_Version, at = list(Question_Num = c(1, 10, 20)))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(emm2)
## Phrase_Condition Game_Version emmean SE df asymp.LCL asymp.UCL
## 1 1 0.640 0.0420 Inf 0.557 0.722
## 2 1 0.772 0.0421 Inf 0.690 0.854
## 3 1 0.574 0.0423 Inf 0.491 0.656
## 4 1 0.958 0.0415 Inf 0.877 1.040
## 1 2 0.604 0.0423 Inf 0.521 0.687
## 2 2 0.937 0.0419 Inf 0.855 1.019
## 3 2 0.736 0.0418 Inf 0.654 0.818
## 4 2 1.066 0.0413 Inf 0.985 1.147
## 1 3 0.562 0.0402 Inf 0.483 0.641
## 2 3 0.876 0.0404 Inf 0.796 0.955
## 3 3 0.721 0.0395 Inf 0.643 0.798
## 4 3 1.002 0.0386 Inf 0.926 1.078
## 1 4 0.618 0.0396 Inf 0.541 0.696
## 2 4 0.729 0.0400 Inf 0.651 0.808
## 3 4 0.593 0.0400 Inf 0.514 0.671
## 4 4 0.916 0.0385 Inf 0.840 0.991
##
## Results are averaged over the levels of: Question_Num
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
# Trend analysis
trend_analysis <- emtrends(model5, specs = ~ Phrase_Condition * Game_Version, var = "Question_Num")
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(trend_analysis)
## Phrase_Condition Game_Version Question_Num.trend SE df asymp.LCL
## 1 1 0.000138 2.99e-05 Inf 7.99e-05
## 2 1 0.000138 2.99e-05 Inf 7.99e-05
## 3 1 0.000138 2.99e-05 Inf 7.99e-05
## 4 1 0.000138 2.99e-05 Inf 7.99e-05
## 1 2 0.000138 2.99e-05 Inf 7.99e-05
## 2 2 0.000138 2.99e-05 Inf 7.99e-05
## 3 2 0.000138 2.99e-05 Inf 7.99e-05
## 4 2 0.000138 2.99e-05 Inf 7.99e-05
## 1 3 0.000138 2.99e-05 Inf 7.99e-05
## 2 3 0.000138 2.99e-05 Inf 7.99e-05
## 3 3 0.000138 2.99e-05 Inf 7.99e-05
## 4 3 0.000138 2.99e-05 Inf 7.99e-05
## 1 4 0.000138 2.99e-05 Inf 7.99e-05
## 2 4 0.000138 2.99e-05 Inf 7.99e-05
## 3 4 0.000138 2.99e-05 Inf 7.99e-05
## 4 4 0.000138 2.99e-05 Inf 7.99e-05
## asymp.UCL
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
## 0.000197
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
plot(emm2)
# Define the function to plot diagnostics for logistic mixed-effects models
plotModelDiagnostics <- function(model, data) {
# Deviance Residuals Plot
dev_res <- resid(model, type = "deviance")
plot(dev_res, ylim = c(min(dev_res), max(dev_res)), main = "Deviance Residuals")
abline(h = 0, col = "red")
# ROC Curve and AUC calculation
probabilities <- predict(model, type = "response")
actual <- data$Answer
pred <- prediction(probabilities, actual)
perf <- performance(pred, "tpr", "fpr")
# Plot ROC Curve
plot(perf, main = "ROC Curve")
abline(0, 1, col = "blue") # Adding diagonal line for reference
# Calculate AUC and display it
auc <- performance(pred, "auc")
auc_value <- auc@y.values[[1]]
cat("Area Under the Curve (AUC):", auc_value, "\n")
# Simple plot just showing AUC value
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", main = "AUC Value")
text(1, 1, labels = paste("AUC =", round(auc_value, 3)), cex = 2)
}
# Example of using the function (you should already have a model fitted)
# plotModelDiagnostics(model1, data)
Accounting for variability among participants (participants as a random effect) Oa: ac ~ 1 + (1 | participant)
# Fit a basic logistic mixed-effects model
model0 <- glmer(Answer ~ 1 + (1 | Player_ID), data=data, family=binomial)
summary(model0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Answer ~ 1 + (1 | Player_ID)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 19426.2 19442.2 -9711.1 19422.2 21486
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3308 0.3398 0.4080 0.4852 0.6156
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.147 0.3835
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.69897 0.06555 25.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plotModelDiagnostics(model0, data)
## Area Under the Curve (AUC): 0.6125971
##Does learning take place?
1: ac - 1 + time + (1 + time | participant)
control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model1 <- glmer(Answer ~ 1 + Question_Num + (1 + Question_Num | Player_ID), data = data, family = binomial, control = control)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0920642 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
summary(model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Answer ~ 1 + Question_Num + (1 + Question_Num | Player_ID)
## Data: data
## Control: control
##
## AIC BIC logLik deviance df.resid
## 19072.6 19112.4 -9531.3 19062.6 21483
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2920 0.2952 0.3987 0.4907 0.8663
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Player_ID (Intercept) 4.367e-01 0.660848
## Question_Num 9.453e-06 0.003075 -0.85
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.4407382 0.1145751 12.575 < 2e-16 ***
## Question_Num 0.0021346 0.0005408 3.947 7.91e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Question_Nm -0.843
## optimizer (bobyqa) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0920642 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
plotModelDiagnostics(model1, data)
## Area Under the Curve (AUC): 0.6584369
2: ac - 1 + condition + (1 + condition | participant)
control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model2 <- glmer(Answer ~ 1 + Phrase_Condition + (1 + Phrase_Condition | Player_ID), data = data, family = binomial, control = control)
# Output the summary of the model to see the results
summary(model2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Answer ~ 1 + Phrase_Condition + (1 + Phrase_Condition | Player_ID)
## Data: data
## Control: control
##
## AIC BIC logLik deviance df.resid
## 17124.4 17236.1 -8548.2 17096.4 21474
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.6838 0.1302 0.2550 0.6043 0.8368
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Player_ID (Intercept) 0.6505 0.8065
## Phrase_Condition2 0.6693 0.8181 -0.71
## Phrase_Condition3 0.5456 0.7387 -0.10 0.07
## Phrase_Condition4 0.7101 0.8427 -0.95 0.78 0.27
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.6326 0.1716 21.169 < 2e-16 ***
## Phrase_Condition2 -1.0569 0.1862 -5.677 1.37e-08 ***
## Phrase_Condition3 -0.6074 0.1870 -3.248 0.00116 **
## Phrase_Condition4 -2.7371 0.1777 -15.404 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Phr_C2 Phr_C3
## Phrs_Cndtn2 -0.772
## Phrs_Cndtn3 -0.435 0.387
## Phrs_Cndtn4 -0.961 0.797 0.502
plotModelDiagnostics(model2, data)
## Area Under the Curve (AUC): 0.7628776
##Does the version of the experiment cause different outcomes?
ac - 1 + version + ((1 | participant)
control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model3 <- glmer(Answer ~ 1 + Game_Version + (1 | Player_ID), data = data, family = binomial, control = control)
summary(model3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Answer ~ 1 + Game_Version + (1 | Player_ID)
## Data: data
## Control: control
##
## AIC BIC logLik deviance df.resid
## 19427.4 19467.3 -9708.7 19417.4 21483
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2821 0.3402 0.4049 0.4857 0.6153
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.1276 0.3573
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.9312 0.1276 15.131 <2e-16 ***
## Game_Version2 -0.2351 0.1796 -1.309 0.1905
## Game_Version3 -0.3804 0.1739 -2.187 0.0288 *
## Game_Version4 -0.2899 0.1742 -1.664 0.0961 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Gm_Vr2 Gm_Vr3
## Game_Versn2 -0.710
## Game_Versn3 -0.732 0.520
## Game_Versn4 -0.731 0.520 0.536
plotModelDiagnostics(model3, data)
## Area Under the Curve (AUC): 0.6125669
##Hoes does the effect of condition vary depending on the version?
ac - 1 + condition * version + (1 + condition | participant)
control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model4 <- glmer(Answer ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID), data = data, family = binomial, control = control)
summary(model4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Answer ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID)
## Data: data
## Control: control
##
## AIC BIC logLik deviance df.resid
## 17377.0 17512.6 -8671.5 17343.0 21471
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.3529 0.1829 0.2815 0.5604 0.8339
##
## Random effects:
## Groups Name Variance Std.Dev.
## Player_ID (Intercept) 0.08701 0.295
## Number of obs: 21488, groups: Player_ID, 38
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.29538 0.20492 16.081 < 2e-16 ***
## Phrase_Condition2 -0.58366 0.22782 -2.562 0.010409 *
## Phrase_Condition3 -0.02402 0.24690 -0.097 0.922491
## Phrase_Condition4 -2.23561 0.18820 -11.879 < 2e-16 ***
## Game_Version2 -0.17268 0.27494 -0.628 0.529975
## Game_Version3 0.26339 0.29539 0.892 0.372568
## Game_Version4 0.03719 0.27202 0.137 0.891265
## Phrase_Condition2:Game_Version2 -0.09807 0.29960 -0.327 0.743405
## Phrase_Condition3:Game_Version2 -1.14246 0.30586 -3.735 0.000188 ***
## Phrase_Condition4:Game_Version2 0.05288 0.24940 0.212 0.832088
## Phrase_Condition2:Game_Version3 -0.56761 0.31927 -1.778 0.075436 .
## Phrase_Condition3:Game_Version3 -1.18155 0.32808 -3.601 0.000316 ***
## Phrase_Condition4:Game_Version3 -0.50828 0.27148 -1.872 0.061175 .
## Phrase_Condition2:Game_Version4 -0.50645 0.29157 -1.737 0.082398 .
## Phrase_Condition3:Game_Version4 -0.72037 0.30843 -2.336 0.019512 *
## Phrase_Condition4:Game_Version4 -0.16336 0.24556 -0.665 0.505889
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
plotModelDiagnostics(model4, data)
## Area Under the Curve (AUC): 0.7478004